/* This file is part of swapper project
 *
 * Copyright (C) 2020 The Swapper Project Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.swapper.regexp;

import java.util.*;
import java.util.function.Consumer;

/**
 * The regular expression character classes.
 * Including normal, predefined, and POSIX character classes.
 */
final class CharacterClasses implements Iterable<CharacterRange>, Weighty {
  /**
   * Predefined character classes.
   * Matches any character other than '\n' and '\r'.
   * Pattern: '.', is equivalent to '[^\n\r]'.
   * Avoid range high surrogate and low surrogate.
   */
  public static final CharacterClasses ANY = create(ranges -> {
      ranges.add(new CharacterRange(0x000000, 0x000009));
      ranges.add(new CharacterRange(0x00000B, 0x00000C));
      ranges.add(new CharacterRange(0x00000E, 0x00D7FF));
      ranges.add(new CharacterRange(0x00E000, 0x10FFFF));
    }
  );

  /**
   * Predefined character classes.
   * Matches any character other than '\n' and '\r'
   * within ASCII graph characters([0x21-0x7e]).
   * Pattern: '.', is equivalent to '[^\n\r]'.
   */
  public static final CharacterClasses ANY_WITHIN_GRAPH = create(ranges ->
    ranges.add(new CharacterRange(0x000020, 0x00007E))
  );

  /**
   * Predefined character classes.
   * Matches digit character({@link #POSIX_DIGIT}).
   * Pattern: '\d', is equivalent to '[0-9]'.
   */
  public static final CharacterClasses DIGIT = create(ranges ->
    ranges.add(new CharacterRange(0x000030, 0x000039))
  );

  /**
   * Predefined character classes.
   * Matches not digit character.
   * Pattern: '\D', is equivalent to '[^0-9]'.
   * Avoid range high surrogate and low surrogate.
   */
  public static final CharacterClasses NON_DIGIT = create(ranges -> {
      ranges.add(new CharacterRange(0x000000, 0x00002F));
      ranges.add(new CharacterRange(0x00003A, 0x00D7FF));
      ranges.add(new CharacterRange(0x00E000, 0x10FFFF));
    }
  );

  /**
   * Predefined character classes.
   * Matches not digit character within ASCII graph characters([0x21-0x7e]).
   * Pattern: '\D', is equivalent to '[^0-9]'.
   */
  public static final CharacterClasses NON_DIGIT_WITHIN_GRAPH = create(ranges -> {
      ranges.add(new CharacterRange(0x000000, 0x00002F));
      ranges.add(new CharacterRange(0x00003A, 0x00007E));
    }
  );

  /**
   * Predefined character classes.
   * Matches whitespace character({@link #POSIX_SPACE}).
   * Pattern: '\s', is equivalent to '[\t-\r\x20]'.
   */
  public static final CharacterClasses WHITESPACE = create(ranges -> {
      ranges.add(new CharacterRange(0x000009, 0x00000D));
      ranges.add(new CharacterRange(0x000020));
    }
  );

  /**
   * Predefined character classes.
   * Matches non-whitespace character.
   * Pattern: '\S', is equivalent to '[^\t-\r\x20]'.
   * Avoid range high surrogate and low surrogate.
   */
  public static final CharacterClasses NON_WHITESPACE = create(ranges -> {
      ranges.add(new CharacterRange(0x000000, 0x000008));
      ranges.add(new CharacterRange(0x00000E, 0x00001F));
      ranges.add(new CharacterRange(0x000021, 0x00D7FF));
      ranges.add(new CharacterRange(0x00E000, 0x10FFFF));
    }
  );

  /**
   * Predefined character classes.
   * Matches non-whitespace character within ASCII graph characters([0x21-0x7e]).
   * Pattern: '\S', is equivalent to '[^\t-\r\x20]'.
   */
  public static final CharacterClasses NON_WHITESPACE_WITHIN_GRAPH = create(ranges ->
    ranges.add(new CharacterRange(0x000021, 0x00007E))
  );

  /**
   * Predefined character classes.
   * Matches word character.
   * Pattern: '\w', is equivalent to '[0-9A-Z_a-z]'.
   */
  public static final CharacterClasses WORD = create(ranges -> {
      ranges.add(new CharacterRange(0x000030, 0x000039));
      ranges.add(new CharacterRange(0x000041, 0x00005A));
      ranges.add(new CharacterRange(0x00005F));
      ranges.add(new CharacterRange(0x000061, 0x00007A));
    }
  );

  /**
   * Predefined character classes.
   * Matches non-word character.
   * Pattern: '\W', is equivalent to '[^0-9A-Z_a-z]'.
   * Avoid range high surrogate and low surrogate.
   */
  public static final CharacterClasses NON_WORD = create(ranges -> {
      ranges.add(new CharacterRange(0x000000, 0x00002F));
      ranges.add(new CharacterRange(0x00003A, 0x000040));
      ranges.add(new CharacterRange(0x00005B, 0x00005E));
      ranges.add(new CharacterRange(0x000060));
      ranges.add(new CharacterRange(0x00007B, 0x00D7FF));
      ranges.add(new CharacterRange(0x00E000, 0x10FFFF));
    }
  );

  /**
   * Predefined character classes.
   * Matches non-word character within ASCII graph characters([0x21-0x7e]).
   * Pattern: '\W', is equivalent to '[^0-9A-Z_a-z]'.
   */
  public static final CharacterClasses NON_WORD_WITHIN_GRAPH = create(ranges -> {
      ranges.add(new CharacterRange(0x000020, 0x00002F));
      ranges.add(new CharacterRange(0x00003A, 0x000040));
      ranges.add(new CharacterRange(0x00005B, 0x00005E));
      ranges.add(new CharacterRange(0x000060));
      ranges.add(new CharacterRange(0x00007B, 0x00007E));
    }
  );

  /**
   * Predefined character classes.
   * Matches horizontal whitespace character.
   * Pattern: '\h', is equivalent to '[\t\xA0\u1680\u180e\u2000-\u200a\u202f\u205f\u3000]'.
   */
  public static final CharacterClasses HORIZONTAL_WHITESPACE = create(ranges -> {
      ranges.add(new CharacterRange(0x000009));
      ranges.add(new CharacterRange(0x0000A0));
      ranges.add(new CharacterRange(0x001680));
      ranges.add(new CharacterRange(0x00180E));
      ranges.add(new CharacterRange(0x002000, 0x00200A));
      ranges.add(new CharacterRange(0x00202F));
      ranges.add(new CharacterRange(0x00205F));
      ranges.add(new CharacterRange(0x003000));
    }
  );

  /**
   * Predefined character classes.
   * Matches non-horizontal whitespace character.
   * Pattern: '\H', is equivalent to '[^\h]'.
   * Avoid range high surrogate and low surrogate.
   */
  public static final CharacterClasses NON_HORIZONTAL_WHITESPACE = create(ranges -> {
      ranges.add(new CharacterRange(0x000000, 0x000008));
      ranges.add(new CharacterRange(0x00000A, 0x00009F));
      ranges.add(new CharacterRange(0x0000A1, 0x00167F));
      ranges.add(new CharacterRange(0x001681, 0x00180D));
      ranges.add(new CharacterRange(0x00180F, 0x001FFF));
      ranges.add(new CharacterRange(0x00200B, 0x00202E));
      ranges.add(new CharacterRange(0x002030, 0x00205E));
      ranges.add(new CharacterRange(0x002060, 0x002FFF));
      ranges.add(new CharacterRange(0x003001, 0x00D7FF));
      ranges.add(new CharacterRange(0x00E000, 0x10FFFF));
    }
  );

  /**
   * Predefined character classes.
   * Matches non-horizontal whitespace character within ASCII graph characters([0x21-0x7e]).
   * Pattern: '\H', is equivalent to '[^\h]'.
   */
  public static final CharacterClasses NON_HORIZONTAL_WHITESPACE_WITHIN_GRAPH = create(ranges -> {
      ranges.add(new CharacterRange(0x000000, 0x000008));
      ranges.add(new CharacterRange(0x00000A, 0x00007E));
    }
  );

  /**
   * Predefined character classes.
   * Matches vertical whitespace character.
   * Pattern: '\v', is equivalent to '[\n-\r\x85\u2028\u2029]'.
   */
  public static final CharacterClasses VERTICAL_WHITESPACE = create(ranges -> {
      ranges.add(new CharacterRange(0x00000A, 0x00000D));
      ranges.add(new CharacterRange(0x000085, 0x000085));
      ranges.add(new CharacterRange(0x002028, 0x002029));
    }
  );

  /**
   * Predefined character classes.
   * Matches non-vertical whitespace character.
   * Pattern: '\V', is equivalent to '[^\v]'.
   * Avoid range high surrogate and low surrogate.
   */
  public static final CharacterClasses NON_VERTICAL_WHITESPACE = create(ranges -> {
      ranges.add(new CharacterRange(0x000000, 0x000009));
      ranges.add(new CharacterRange(0x00000E, 0x000084));
      ranges.add(new CharacterRange(0x000086, 0x002027));
      ranges.add(new CharacterRange(0x00202A, 0x00D7FF));
      ranges.add(new CharacterRange(0x00E000, 0x10FFFF));
    }
  );

  /**
   * Predefined character classes.
   * Matches non-vertical whitespace character within ASCII graph characters([0x21-0x7e]).
   * Pattern: '\V', is equivalent to '[^\v]'.
   */
  public static final CharacterClasses NON_VERTICAL_WHITESPACE_WITHIN_GRAPH = create(ranges -> {
      ranges.add(new CharacterRange(0x000000, 0x000009));
      ranges.add(new CharacterRange(0x00000E, 0x00007E));
    }
  );

  /**
   * POSIX character classes (US-ASCII only).
   * Matches lower-case alphabetic character.
   * Pattern: '\p{Lower}', is equivalent to '[a-z]'.
   */
  public static final CharacterClasses POSIX_LOWER = create(ranges ->
    ranges.add(new CharacterRange(0x000061, 0x00007A))
  );

  /**
   * POSIX character classes (US-ASCII only).
   * Matches upper-case alphabetic character.
   * Pattern: '\p{Upper}', is equivalent to '[A-Z]'.
   */
  public static final CharacterClasses POSIX_UPPER = create(ranges ->
    ranges.add(new CharacterRange(0x000041, 0x00005A))
  );

  /**
   * POSIX character classes (US-ASCII only).
   * Matches all ASCII character.
   * Pattern: '\p{ASCII}', is equivalent to '[\x00-\x7F]'.
   */
  public static final CharacterClasses POSIX_ASCII = create(ranges ->
    ranges.add(new CharacterRange(0x000000, 0x00007F))
  );

  /**
   * POSIX character classes (US-ASCII only).
   * Matches alphabetic character.
   * Pattern: '\p{Alpha}', is equivalent to '[\p{Lower}\p{Upper}]' or '[A-Za-z]'.
   */
  public static final CharacterClasses POSIX_ALPHA = create(ranges -> {
    ranges.add(new CharacterRange(0x000041, 0x00005A));
    ranges.add(new CharacterRange(0x000061, 0x00007A));
  });

  /**
   * POSIX character classes (US-ASCII only).
   * Matches decimal digit character({@link #DIGIT}).
   * Pattern: '\p{Digit}', is equivalent to '[0-9]'.
   */
  public static final CharacterClasses POSIX_DIGIT = create(ranges ->
    ranges.add(new CharacterRange(0x000030, 0x000039))
  );

  /**
   * POSIX character classes (US-ASCII only).
   * Matches alphanumeric character.
   * Pattern: '\p{Alnum}', is equivalent to '[\p{Alpha}\p{Digit}]' or '[0-9A-Za-z]'.
   */
  public static final CharacterClasses POSIX_ALNUM = create(ranges -> {
    ranges.add(new CharacterRange(0x000030, 0x000039));
    ranges.add(new CharacterRange(0x000041, 0x00005A));
    ranges.add(new CharacterRange(0x000061, 0x00007A));
  });

  /**
   * POSIX character classes (US-ASCII only).
   * Matches punctuation(One of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~) character.
   * Pattern: '\p{Punct}'.
   */
  public static final CharacterClasses POSIX_PUNCT = create(ranges -> {
    ranges.add(new CharacterRange(0x000021, 0x00002F));
    ranges.add(new CharacterRange(0x00003A, 0x000040));
    ranges.add(new CharacterRange(0x00005B, 0x000060));
    ranges.add(new CharacterRange(0x00007B, 0x00007E));
  });

  /**
   * POSIX character classes (US-ASCII only).
   * Matches visible character.
   * Pattern: '\p{Graph}', is equivalent to '[\p{Alnum}\p{Punct}]'.
   */
  public static final CharacterClasses POSIX_GRAPH = create(ranges ->
    ranges.add(new CharacterRange(0x000021, 0x00007E))
  );

  /**
   * POSIX character classes (US-ASCII only).
   * Matches printable character.
   * Pattern: '\p{Print}', is equivalent to '[\x20\p{Graph}]'.
   */
  public static final CharacterClasses POSIX_PRINT = create(ranges ->
    ranges.add(new CharacterRange(0x000020, 0x00007E))
  );

  /**
   * POSIX character classes (US-ASCII only).
   * Matches space or tab character.
   * Pattern: '\p{Blank}', is equivalent to '[\t\x20]'.
   */
  public static final CharacterClasses POSIX_BLANK = create(ranges -> {
    ranges.add(new CharacterRange(0x000009));
    ranges.add(new CharacterRange(0x000020));
  });

  /**
   * POSIX character classes (US-ASCII only).
   * Matches control character.
   * Pattern: '\p{Cntrl}', is equivalent to '[\x00-\x1F\x7F]'.
   */
  public static final CharacterClasses POSIX_CNTRL = create(ranges -> {
    ranges.add(new CharacterRange(0x000000, 0x00001F));
    ranges.add(new CharacterRange(0x00007F));
  });

  /**
   * POSIX character classes (US-ASCII only).
   * Matches hexadecimal digit character.
   * Pattern: '\p{XDigit}', is equivalent to '[0-9A-Fa-f]'.
   */
  public static final CharacterClasses POSIX_XDIGIT = create(ranges -> {
    ranges.add(new CharacterRange(0x000030, 0x000039));
    ranges.add(new CharacterRange(0x000041, 0x000046));
    ranges.add(new CharacterRange(0x000061, 0x000066));
  });

  /**
   * POSIX character classes (US-ASCII only).
   * Matches whitespace character({@link #WHITESPACE}).
   * Pattern: '\p{Space}', is equivalent to '[\t-\r\x20]'.
   */
  public static final CharacterClasses POSIX_SPACE = create(ranges -> {
    ranges.add(new CharacterRange(0x000009, 0x00000D));
    ranges.add(new CharacterRange(0x000020));
  });

  private int finalWeight;
  private boolean unmodifiable;
  private final TreeSet<CharacterRange> ranges = new TreeSet<>();

  private static CharacterClasses create(Consumer<Collection<CharacterRange>> consumer) {
    CharacterClasses classes = new CharacterClasses();
    consumer.accept(classes.ranges);
    return classes.unmodifiable();
  }

  /**
   * After this method is called, the instance is forbidden to be modified,
   * and an {@link UnsupportedOperationException} is thrown when you attempt to modify it.
   */
  public CharacterClasses unmodifiable() {
    finalWeight = weight();
    unmodifiable = true;
    return this;
  }

  /**
   * Merges a specified range into a classes.
   *
   * @param range The range to be merged.
   * @throws NullPointerException If {@code range} is null.
   */
  public void union(CharacterRange range) {
    if (unmodifiable) {
      throw new UnsupportedOperationException("union");
    }
    int joinBegin = range.begin(), joinEnd = range.end();
    int mergeBegin = joinBegin, mergeEnd = joinEnd;
    Iterator<CharacterRange> itr = ranges.iterator();
    while (itr.hasNext()) {
      CharacterRange joined = itr.next();
      int joinedBegin = joined.begin(), joinedEnd = joined.end();
      if (joinedEnd >= joinBegin - 1 && joinedBegin <= joinEnd + 1) {
        if (joinedBegin < mergeBegin) {
          mergeBegin = joinedBegin;
        }
        if (joinedEnd > mergeEnd) {
          mergeEnd = joinedEnd;
        }
        itr.remove();
      }
    }
    ranges.add(new CharacterRange(mergeBegin, mergeEnd));
  }

  /**
   * Merges a specified range collection into a classes.
   *
   * @param ranges The character ranges to be merged.
   * @throws NullPointerException If {@code ranges} is null.
   * @see #union(CharacterRange)
   */
  public void unions(Iterable<CharacterRange> ranges) {
    if (unmodifiable) {
      throw new UnsupportedOperationException("unions");
    }
    for (CharacterRange range : ranges) {
      union(range);
    }
  }

  /**
   * Computes the except set of the current instance
   * from {@link Character#MIN_CODE_POINT} to {@link Character#MAX_CODE_POINT}.
   * Avoid range high surrogate and low surrogate.
   *
   * @return the except set of the current instance.
   */
  public CharacterClasses except() {
    CharacterClasses result = new CharacterClasses();
    int currentBegin = Character.MIN_CODE_POINT;
    for (CharacterRange temp : ranges) {
      int tempBegin = temp.begin(), tempEnd = temp.end();
      if (currentBegin < tempBegin) {
        if (tempBegin > Character.MIN_HIGH_SURROGATE) {
          tempBegin = Character.MIN_HIGH_SURROGATE;
        }
        result.ranges.add(new CharacterRange(currentBegin, tempBegin - 1));
        if (tempEnd == Character.MAX_CODE_POINT) {
          return result;
        }
      }
      if (tempEnd >= Character.MIN_HIGH_SURROGATE - 1 && tempEnd < Character.MAX_LOW_SURROGATE) {
        tempEnd = Character.MAX_LOW_SURROGATE;
      }
      currentBegin = tempEnd + 1;
    }
    if (currentBegin < Character.MAX_CODE_POINT) {
      if (currentBegin < Character.MIN_HIGH_SURROGATE) {
        result.ranges.add(new CharacterRange(currentBegin, Character.MIN_HIGH_SURROGATE - 1));
        currentBegin = Character.MAX_LOW_SURROGATE + 1;
      }
      result.ranges.add(new CharacterRange(currentBegin, Character.MAX_CODE_POINT));
    }
    return result;
  }

  public int size() {
    return ranges.size();
  }

  public boolean isEmpty() {
    return ranges.isEmpty();
  }

  public CharacterRange first() {
    return ranges.first();
  }

  public CharacterRange last() {
    return ranges.last();
  }

  @Override
  public int weight() {
    return unmodifiable ? finalWeight : ranges.stream().mapToInt(CharacterRange::weight).sum();
  }

  @SuppressWarnings("all")
  @Override
  public Iterator<CharacterRange> iterator() {
    final Iterator<CharacterRange> itr = ranges.iterator();
    if (unmodifiable) {
      return new Iterator<CharacterRange>() {
        @Override
        public boolean hasNext() {
          return itr.hasNext();
        }

        @Override
        public CharacterRange next() {
          return itr.next();
        }

        @Override
        public void remove() {
          throw new UnsupportedOperationException("remove");
        }
      };
    }
    return itr;
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (o instanceof CharacterClasses) {
      CharacterClasses that = (CharacterClasses) o;
      return Objects.equals(ranges, that.ranges);
    }
    return false;
  }

  @Override
  public int hashCode() {
    return Objects.hash(finalWeight, unmodifiable, ranges);
  }

  @Override
  public String toString() {
    return ranges.toString();
  }
}
